Image Analysis 2 code

packages <- c("tidyverse","imager","readxl","wesanderson","RColorBrewer")
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.4.4     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(imager)
## Loading required package: magrittr
## 
## Attaching package: 'magrittr'
## 
## The following object is masked from 'package:purrr':
## 
##     set_names
## 
## The following object is masked from 'package:tidyr':
## 
##     extract
## 
## 
## Attaching package: 'imager'
## 
## The following object is masked from 'package:magrittr':
## 
##     add
## 
## The following object is masked from 'package:stringr':
## 
##     boundary
## 
## The following object is masked from 'package:dplyr':
## 
##     where
## 
## The following object is masked from 'package:tidyr':
## 
##     fill
## 
## The following objects are masked from 'package:stats':
## 
##     convolve, spectrum
## 
## The following object is masked from 'package:graphics':
## 
##     frame
## 
## The following object is masked from 'package:base':
## 
##     save.image
library(readxl)
library(wesanderson)
library(RColorBrewer)
library(MASS)
## 
## Attaching package: 'MASS'
## 
## The following object is masked from 'package:dplyr':
## 
##     select
df<- read.csv("~/Data Science/R/image-analysis-2-final-AdamWalters1/data/image-analysis-data.csv")
summary(df)
##     imageId        mountain           tree          prettySky     
##  Min.   : 1.0   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.: 8.5   1st Qu.:0.0000   1st Qu.:1.0000   1st Qu.:0.0000  
##  Median :16.0   Median :0.0000   Median :1.0000   Median :1.0000  
##  Mean   :16.0   Mean   :0.3226   Mean   :0.8387   Mean   :0.5161  
##  3rd Qu.:23.5   3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:1.0000  
##  Max.   :31.0   Max.   :1.0000   Max.   :1.0000   Max.   :1.0000  
##      person           child            animal         virginiaTech   
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.00000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.00000   1st Qu.:0.0000  
##  Median :1.0000   Median :0.0000   Median :0.00000   Median :0.0000  
##  Mean   :0.5484   Mean   :0.2258   Mean   :0.09677   Mean   :0.3548  
##  3rd Qu.:1.0000   3rd Qu.:0.0000   3rd Qu.:0.00000   3rd Qu.:1.0000  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.00000   Max.   :1.0000  
##       food   entertainment         city           country      
##  Min.   :0   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0   Median :0.0000   Median :0.0000   Median :0.0000  
##  Mean   :0   Mean   :0.3548   Mean   :0.3871   Mean   :0.2903  
##  3rd Qu.:0   3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:1.0000  
##  Max.   :0   Max.   :1.0000   Max.   :1.0000   Max.   :1.0000  
##     building     
##  Min.   :0.0000  
##  1st Qu.:0.0000  
##  Median :1.0000  
##  Mean   :0.7097  
##  3rd Qu.:1.0000  
##  Max.   :1.0000
numFeatures <-colSums(df[,-1])
listFeatures <- names(df)[-1]

orderFeatures <- order(numFeatures,decreasing = TRUE)
listFeatures[orderFeatures]
##  [1] "tree"          "building"      "person"        "prettySky"    
##  [5] "city"          "virginiaTech"  "entertainment" "mountain"     
##  [9] "country"       "child"         "animal"        "food"
barplot(numFeatures, main = "Feature Popularity", xlab = "Features", ylab = "Count")

barplot(numFeatures, main = "Feature Popularity", xlab = "Count", ylab = "Features", horiz = TRUE, col = "lightblue")

features = data.frame(Feature = listFeatures, Count = numFeatures)
ggplot(features,aes(x=reorder(Feature, Count), y=Count,fill = Feature))+
       geom_bar(stat = "identity")+
         coord_flip()+
         scale_fill_brewer(palette = "Set3")+
         theme_minimal()+
         theme(legend.position="none")+
         xlab("Count")+
         ylab("Feature")+
         ggtitle("Features Bases on Count")

bb1 <- load.image("images/bb1.jpeg")
plot(bb1)

# Sum the values in each row (excluding the first column)
image_sums <- rowSums(df[, -1])

# Find the index of the image with the highest sum
image_with_most_features <- which.max(image_sums)

# Get the corresponding image ID
image_id_with_most_features <- df$imageId[image_with_most_features]
image_id_with_most_features
## [1] 10
listImages <- df[,1]
orderImages <- order(image_sums, decreasing = TRUE)
orderImages
##  [1] 10 11 28  3  5 23 25 29 31  2  6  7  9 14 26 27  1  8 12 13 15 17 18 20 22
## [26] 30 21  4 16 19 24
top_5_images <- orderImages[1:5]

# Get the corresponding image IDs
top_5_image_ids <- df$imageId[top_5_images]
top_5_image_ids
## [1] 10 11 28  3  5
for (image_id in top_5_image_ids) {
  bb <- paste0("images/bb", image_id, ".jpeg")
  image <- load.image(bb)
  plot(image)
}

cbind(image=orderImages,count=image_sums[orderImages])[1:5,]
##      image count
## [1,]    10     8
## [2,]    11     7
## [3,]    28     7
## [4,]     3     6
## [5,]     5     6

Image Analysis 3

dat2 <- df[,-1]
dat <- df
km2<-kmeans(dat2,2,nstart=20)

cbind(dat[,-1],km2$cluster)
##    mountain tree prettySky person child animal virginiaTech food entertainment
## 1         0    1         0      1     0      0            0    0             0
## 2         1    1         1      0     0      0            0    0             0
## 3         0    1         0      1     0      1            0    0             1
## 4         0    0         0      0     0      0            0    0             0
## 5         0    1         1      1     0      0            1    0             0
## 6         1    1         1      0     0      0            0    0             0
## 7         0    1         1      0     0      0            1    0             0
## 8         1    1         0      0     0      0            0    0             0
## 9         1    1         1      1     0      1            0    0             0
## 10        1    1         0      1     1      0            1    0             1
## 11        0    1         1      1     1      0            1    0             1
## 12        0    1         1      0     0      0            1    0             0
## 13        0    1         1      0     0      0            0    0             1
## 14        0    1         0      1     0      0            1    0             1
## 15        1    1         1      0     0      0            0    0             0
## 16        0    1         0      0     0      0            0    0             0
## 17        0    1         0      1     0      0            0    0             0
## 18        1    1         1      0     0      0            0    0             0
## 19        0    0         0      1     0      0            1    0             0
## 20        0    1         0      1     0      0            0    0             1
## 21        0    1         0      0     0      0            1    0             0
## 22        0    1         1      0     0      0            1    0             0
## 23        0    0         1      0     1      0            1    0             1
## 24        0    0         0      1     0      0            1    0             0
## 25        0    1         0      1     1      0            0    0             1
## 26        1    1         1      1     0      0            0    0             0
## 27        0    1         0      1     1      0            0    0             1
## 28        0    1         1      1     1      0            0    0             1
## 29        0    1         0      1     1      1            0    0             1
## 30        1    0         1      0     0      0            0    0             0
## 31        1    1         1      1     0      0            0    0             0
##    city country building km2$cluster
## 1     1       0        1           1
## 2     0       1        1           2
## 3     1       0        1           1
## 4     1       0        1           1
## 5     1       0        1           1
## 6     1       0        1           2
## 7     1       0        1           1
## 8     1       0        1           1
## 9     0       0        0           2
## 10    1       0        1           1
## 11    0       0        1           1
## 12    0       0        1           1
## 13    0       1        0           2
## 14    0       1        0           1
## 15    0       1        0           2
## 16    0       0        1           1
## 17    1       0        1           1
## 18    0       1        0           2
## 19    0       0        0           1
## 20    0       0        1           1
## 21    0       0        1           1
## 22    0       0        1           1
## 23    1       0        1           1
## 24    0       0        0           1
## 25    1       0        1           1
## 26    0       1        0           2
## 27    0       0        1           1
## 28    1       0        1           1
## 29    0       1        0           1
## 30    0       1        1           2
## 31    0       1        1           2
clId1<-dat[km2$cluster==1,1]
clId2<-dat[km2$cluster==2,1]

clImages1<-sapply(clId1,function(id)paste("images/bb",id,".jpeg",sep=""))
clImages2<-sapply(clId2,function(id)paste("images/bb",id,".jpeg",sep=""))

par(mfrow=c(2,3),mar=c(2.1,4.1,1.1,1.1))
plot(load.image(clImages1[1]),main="cl1")
plot(load.image(clImages1[2]),main="cl1")
plot(load.image(clImages1[3]),main="cl1")
plot(load.image(clImages2[1]),main="cl2")
plot(load.image(clImages2[2]),main="cl2")
plot(load.image(clImages2[3]),main="cl2")

elbow <- function(setDat,maxK){
  prop<-rep(0,maxK)
  for(k in 1:maxK){
    km<-kmeans(setDat, centers=k,nstart = 20)
    prop[k]<-km$betweenss/km$totss
  }
  return(prop)
}

elbow_<-elbow(dat2,15)
plot(elbow_)

km5<-kmeans(dat2,5,nstart=20)

clId1<-dat[km5$cluster==1,1]
clId2<-dat[km5$cluster==2,1]
clId3<-dat[km5$cluster==3,1]
clId4<-dat[km5$cluster==4,1]
clId5<-dat[km5$cluster==5,1]

clImages1<-sapply(clId1,function(id)paste("images/bb",id,".jpeg",sep=""))
clImages2<-sapply(clId2,function(id)paste("images/bb",id,".jpeg",sep=""))
clImages3<-sapply(clId3,function(id)paste("images/bb",id,".jpeg",sep=""))
clImages4<-sapply(clId4,function(id)paste("images/bb",id,".jpeg",sep=""))
clImages5<-sapply(clId5,function(id)paste("images/bb",id,".jpeg",sep=""))

par(mfrow=c(2,3),mar=c(2.1,4.1,1.1,1.1))
plot(load.image(clImages1[1]),main="cl1")
plot(load.image(clImages1[2]),main="cl1")
plot(load.image(clImages2[1]),main="cl2")
plot(load.image(clImages2[2]),main="cl2")
plot(load.image(clImages3[1]),main="cl3")
plot(load.image(clImages3[2]),main="cl3")

plot(load.image(clImages4[1]),main="cl4")
plot(load.image(clImages4[2]),main="cl4")
plot(load.image(clImages5[1]),main="cl5")
plot(load.image(clImages5[2]),main="cl5")

Image Analysis 4!!

k<-5
cols <- rainbow(k)

temp<-as.data.frame(km5$center)
names(temp)<-paste("V",1:dim(temp)[[2]],sep="")
parcoord(temp,col=cols,lwd=2,ylim=c(0,2),var.label = FALSE)
legend("topright",horiz = FALSE, legend=as.character(1:k),col=cols,
       lty=rep(1,k),lwd=2,cex=.5,bty="n",ncol=2,title="cluster")
legend("topleft",horiz = FALSE, ncol=3,
       legend = paste(names(temp),names(dat[,-1])),cex=.5)

summary(km5$centers)
##     mountain           tree          prettySky          person      
##  Min.   :0.0000   Min.   :0.3333   Min.   :0.0000   Min.   :0.1667  
##  1st Qu.:0.0000   1st Qu.:0.8333   1st Qu.:0.1667   1st Qu.:0.3333  
##  Median :0.1250   Median :0.8333   Median :0.2500   Median :0.3750  
##  Mean   :0.2667   Mean   :0.7750   Mean   :0.4500   Mean   :0.5750  
##  3rd Qu.:0.3333   3rd Qu.:0.8750   3rd Qu.:0.8333   3rd Qu.:1.0000  
##  Max.   :0.8750   Max.   :1.0000   Max.   :1.0000   Max.   :1.0000  
##      child            animal       virginiaTech       food   entertainment   
##  Min.   :0.0000   Min.   :0.000   Min.   :0.00   Min.   :0   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.000   1st Qu.:0.00   1st Qu.:0   1st Qu.:0.1250  
##  Median :0.0000   Median :0.000   Median :0.25   Median :0   Median :0.1667  
##  Mean   :0.1833   Mean   :0.075   Mean   :0.45   Mean   :0   Mean   :0.3250  
##  3rd Qu.:0.1667   3rd Qu.:0.125   3rd Qu.:1.00   3rd Qu.:0   3rd Qu.:0.3333  
##  Max.   :0.7500   Max.   :0.250   Max.   :1.00   Max.   :0   Max.   :1.0000  
##       city           country          building    
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.375  
##  Median :0.5000   Median :0.1250   Median :0.875  
##  Mean   :0.3667   Mean   :0.2667   Mean   :0.650  
##  3rd Qu.:0.5000   3rd Qu.:0.3333   3rd Qu.:1.000  
##  Max.   :0.8333   Max.   :0.8750   Max.   :1.000

MDS

  1. -> 7.
 #for continuous data
distPrev<-dist(dat[,-1])
mdsPrev<-cmdscale(distPrev)
plot(mdsPrev, col = cols[km5$cluster],xlab="MDS Coord 1",
     ylab="MDS coord 2", main="Original Distance Measure")
legend("bottomright",legend=1:6,pch=1,col=cols,cex=.5)

# for binary data (including image id's)
image_ids <- dat[, 1]
distNew<-dist(dat[,-1],method="binary")
mdsNew<-cmdscale(distNew)
plot(mdsNew, col = cols[km5$cluster],xlab="MDS Coord 1",
     ylab="MDS coord 2", main="Original Distance Measure")
legend("bottomright",legend=1:6,pch=1,col=cols,cex=.5)
text(mdsNew, labels=image_ids, pos=1, cex=0.7, col=cols[km5$cluster])

means <- tapply(dat$tree, km5$cluster, mean)

# Create a bar graph for mean variable values across different clusters
barplot(means, 
        main = "Bar Graph for Different Clusters",
        xlab = "Cluster",
        ylab = "trees")